################################################################################
## setup
################################################################################
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("foreign",
         "countrycode",
         "tidyverse",
         "RColorBrewer", 
         "gridExtra",
         "tools", 
         "lfe",
         "stargazer")
lapply(pkg, require, character.only = TRUE)

# set directory
MAIN_DIR <- "~/Dropbox/Research/Liao_Newman_Malhotra/replication/"

# set file directories
FIG_DIR <- paste(MAIN_DIR, "figures/", sep = "")
TABLE_DIR <- paste(MAIN_DIR, "tables/", sep = "")
RESULTS <- paste(MAIN_DIR, "results/", sep = "")

# create folder if doesn't exist
if (!dir.exists(FIG_DIR)) {
  print("Directory does not exist! Creating...")
  dir.create(FIG_DIR)
}
if (!dir.exists(TABLE_DIR)) {
  print("Directory does not exist! Creating...")
  dir.create(TABLE_DIR)
}
if (!dir.exists(RESULTS)) {
  print("Directory does not exist! Creating...")
  dir.create(RESULTS)
}

# load data
load(file = paste(MAIN_DIR, "price-merge.RData", sep = ""))
load(file = paste(MAIN_DIR, "foia-aggregate.RData", sep = ""))


################################################################################
## create sub-samples
################################################################################
# set df
df <- price.merge

# subset
df.12.13 <- df %>%
  filter(year == 2012 | year == 2013)

df.11.12 <- df %>%
  filter(year == 2011 | year == 2012)

df.12.14 <- df %>%
  filter(year == 2012 | year == 2014)

df.12.15 <- df %>%
  filter(year == 2012 | year == 2015)


################################################################################
# set var and var name correspondence
################################################################################
var.df <- tibble(var = c("anticorrupt",
                         "anticorrupt_lead_1",
                         "anticorrupt:ln_cn_ug_2012",
                         "anticorrupt:ln_cn_g_2012",
                         "anticorrupt:ln_ind_ug_2012",
                         "anticorrupt_lead_1:ln_cn_ug_2012",
                         "anticorrupt_lead_1:ln_cn_g_2012",
                         "anticorrupt_lead_1:ln_ind_ug_2012",
                         
                         # controls
                         "employ_zcta", 
                         "share_pop_adult_zcta",
                         "ln_pop_density_zcta",
                         "share_pop_white_zcta",
                         "median_household_income_zcta",
                         "share_enroll_zcta",
                         "ln_enroll_zcta",
                         "effective_tax_zcta", 
                         "vacancy_zcta",
                         
                         "anticorrupt:employ_zcta",
                         "anticorrupt:share_pop_adult_zcta",
                         "anticorrupt:ln_pop_density_zcta",
                         "anticorrupt:share_pop_white_zcta",
                         "anticorrupt:median_household_income_zcta",
                         "anticorrupt:share_enroll_zcta",
                         "anticorrupt:ln_enroll_zcta",
                         "anticorrupt:effective_tax_zcta",
                         "anticorrupt:vacancy_zcta",
                         
                         "anticorrupt_lead_1:employ_zcta",
                         "anticorrupt_lead_1:share_pop_adult_zcta",
                         "anticorrupt_lead_1:ln_pop_density_zcta",
                         "anticorrupt_lead_1:share_pop_white_zcta",
                         "anticorrupt_lead_1:median_household_income_zcta",
                         "anticorrupt_lead_1:share_enroll_zcta",
                         "anticorrupt_lead_1:ln_enroll_zcta",
                         "anticorrupt_lead_1:effective_tax_zcta",
                         "anticorrupt_lead_1:vacancy_zcta"),
                 var_name = c("2013 Anti-corruption Campaign (Dummy)",
                              "Placebo Anti-corruption (2012)",
                              "2013 Anti-corruption x Log Chinese Undergrads in ZIP Code (2012)",
                              "2013 Anti-corruption x Log Chinese Grads in ZIP Code (2012)",
                              "2013 Anti-corruption x Log Indian Undergrads in ZIP Code (2012)",
                              "Placebo Anti-corruption (2012) x Log CHN Undergrads in 2012",
                              "Placebo Anti-corruption (2012) x Log CHN Grads in 2012",
                              "Placebo Anti-corruption (2012) x Log IND Undergrads in 2012",
                              
                              # controls
                              "Share of Employment",
                              "Share of Adult Pop.",
                              "Log Population Density",
                              "Share of White Pop.",
                              "Median Household Income (10,000)",
                              "Share Enrolled in College or Above",
                              "Log Pop. Enrolled in College or Above",
                              "Effective Tax Rate (\\%)",
                              "Share of Vacant Houses",
                              
                              "2013 Anti-corruption x Share of Employment",
                              "2013 Anti-corruption x Share of Adult Pop.",
                              "2013 Anti-corruption x Log Population Density",
                              "2013 Anti-corruption x Share of White Pop.",
                              "2013 Anti-corruption x Median Household Income (10,000)",
                              "2013 Anti-corruption x Share Enrolled in College or Above",
                              "2013 Anti-corruption x Log Pop. Enrolled in College or Above",
                              "2013 Anti-corruption x Effective Tax Rate (\\%)",
                              "2013 Anti-corruption x Share of Vacant Houses",
                              
                              "Placebo Anti-corruption (2012) x Share of Employment",
                              "Placebo Anti-corruption (2012) x Share of Adult Pop.",
                              "Placebo Anti-corruption (2012) x Log Population Density",
                              "Placebo Anti-corruption (2012) x Share of White Pop.",
                              "Placebo Anti-corruption (2012) x Median Household Income (10,000)",
                              "Placebo Anti-corruption (2012) x Share Enrolled in College or Above",
                              "Placebo Anti-corruption (2012) x Log Pop. Enrolled in College or Above",
                              "Placebo Anti-corruption (2012) x Effective Tax Rate (\\%)",
                              "Placebo Anti-corruption (2012) x Share of Vacant Houses"))
                             
# function to replace variable names
replaceVarName <- function(var.vec, var.df){
  # Prepare output vector
  out.vec <- rep(NA, length(var.vec))
  matches <- match(var.vec, var.df$var)
  out.vec <- var.df[matches,]$var_name
  
  if(any(is.na(out.vec))){
    warning(paste("Variable concordence missing: ", 
                  paste(var.vec[is.na(out.vec)], collapse = ", "), 
                  sep = ""))
  } else{
    #print("All variables successfully converted")
  }
  
  return(out.vec)
}


################################################################################
## Table S1. DiD Regression Results: Home Prices
################################################################################
# Column (1)
con.b.12.13 <- felm(ln_value_per_year ~ 
                      anticorrupt +
                      ln_cn_ug_2012:anticorrupt 
                    | zcta | 0 | zcta, 
                    data = df.12.13)
summary(con.b.12.13)

# Column (2)
con.os.12.13 <- felm(ln_value_per_year ~ 
                       anticorrupt +
                       ln_cn_ug_2012:anticorrupt +
                       ln_cn_g_2012:anticorrupt +
                       ln_ind_ug_2012:anticorrupt 
                     | zcta | 0 | zcta, 
                     data = df.12.13)
summary(con.os.12.13)

# Column (3)
con.f.12.13 <- felm(ln_value_per_year ~ 
                      anticorrupt +
                      ln_cn_ug_2012:anticorrupt +
                      ln_cn_g_2012:anticorrupt +
                      ln_ind_ug_2012:anticorrupt +
                      employ_zcta + share_pop_adult_zcta + 
                      ln_pop_density_zcta + share_pop_white_zcta + 
                      median_household_income_zcta + share_enroll_zcta +
                      ln_enroll_zcta + effective_tax_zcta + 
                      vacancy_zcta +
                      employ_zcta:anticorrupt + share_pop_adult_zcta:anticorrupt +
                      ln_pop_density_zcta:anticorrupt + share_pop_white_zcta:anticorrupt +
                      median_household_income_zcta:anticorrupt + share_enroll_zcta:anticorrupt +
                      ln_enroll_zcta:anticorrupt + effective_tax_zcta:anticorrupt +
                      vacancy_zcta:anticorrupt
                    | zcta | 0 | zcta, 
                    data = df.12.13)
summary(con.f.12.13)

# Column (4)
con.f.11.12 <- felm(ln_value_per_year ~ 
                      anticorrupt_lead_1 +
                      ln_cn_ug_2012:anticorrupt_lead_1 +
                      ln_cn_g_2012:anticorrupt_lead_1 +
                      ln_ind_ug_2012:anticorrupt_lead_1 +
                      employ_zcta + share_pop_adult_zcta + 
                      ln_pop_density_zcta + share_pop_white_zcta + 
                      median_household_income_zcta + share_enroll_zcta + 
                      ln_enroll_zcta + effective_tax_zcta + 
                      vacancy_zcta +
                      employ_zcta:anticorrupt_lead_1 + share_pop_adult_zcta:anticorrupt_lead_1 + 
                      ln_pop_density_zcta:anticorrupt_lead_1 + share_pop_white_zcta:anticorrupt_lead_1 + 
                      median_household_income_zcta:anticorrupt_lead_1 + share_enroll_zcta:anticorrupt_lead_1 + 
                      ln_enroll_zcta:anticorrupt_lead_1 + effective_tax_zcta:anticorrupt_lead_1 +
                      vacancy_zcta:anticorrupt_lead_1
                    | zcta | 0 | zcta, 
                    data = df.11.12)
summary(con.f.11.12)

# Column (5)
con.f.12.14 <- felm(ln_value_per_year ~ 
                      anticorrupt +
                      ln_cn_ug_2012:anticorrupt +
                      ln_cn_g_2012:anticorrupt +
                      ln_ind_ug_2012:anticorrupt +
                      employ_zcta + share_pop_adult_zcta + 
                      ln_pop_density_zcta + share_pop_white_zcta + 
                      median_household_income_zcta + share_enroll_zcta +
                      ln_enroll_zcta + effective_tax_zcta + 
                      vacancy_zcta +
                      employ_zcta:anticorrupt + share_pop_adult_zcta:anticorrupt +
                      ln_pop_density_zcta:anticorrupt + share_pop_white_zcta:anticorrupt +
                      median_household_income_zcta:anticorrupt + share_enroll_zcta:anticorrupt +
                      ln_enroll_zcta:anticorrupt + effective_tax_zcta:anticorrupt +
                      vacancy_zcta:anticorrupt
                    | zcta | 0 | zcta, 
                    data = df.12.14)
summary(con.f.12.14)

# Column (6)
con.f.12.15 <- felm(ln_value_per_year ~ 
                      anticorrupt +
                      ln_cn_ug_2012:anticorrupt +
                      ln_cn_g_2012:anticorrupt +
                      ln_ind_ug_2012:anticorrupt +
                      employ_zcta + share_pop_adult_zcta + 
                      ln_pop_density_zcta + share_pop_white_zcta + 
                      median_household_income_zcta + share_enroll_zcta +
                      ln_enroll_zcta + effective_tax_zcta + 
                      vacancy_zcta +
                      employ_zcta:anticorrupt + share_pop_adult_zcta:anticorrupt +
                      ln_pop_density_zcta:anticorrupt + share_pop_white_zcta:anticorrupt +
                      median_household_income_zcta:anticorrupt + share_enroll_zcta:anticorrupt +
                      ln_enroll_zcta:anticorrupt + effective_tax_zcta:anticorrupt +
                      vacancy_zcta:anticorrupt
                    | zcta | 0 | zcta, 
                    data = df.12.15)
summary(con.f.12.15)

# set outputs
did.main <- list(con.b.12.13, 
                 con.os.12.13,
                 con.f.12.13,
                 con.f.11.12,
                 con.f.12.14,
                 con.f.12.15)

# set clustered SEs
did.cse <- list(con.b.12.13$cse, 
                con.os.12.13$cse,
                con.f.12.13$cse,
                con.f.11.12$cse,
                con.f.12.14$cse,
                con.f.12.15$cse)

# set p-values
pvalue.main <- list(round(con.b.12.13$cpval, 3),
                    round(con.os.12.13$cpval, 3),
                    round(con.f.12.13$cpval, 3),
                    round(con.f.11.12$cpval, 3),
                    round(con.f.12.14$cpval, 3),
                    round(con.f.12.15$cpval, 3)) 

# set CIs
ci.main <- list(confint(con.b.12.13), 
                confint(con.os.12.13),
                confint(con.f.12.13),
                confint(con.f.11.12),
                confint(con.f.12.14),
                confint(con.f.12.15))

# set model names
mod.names.main <- c("con-base", 
                    "con-extended", 
                    "con-full-12-13",
                    "con-full-11-12",
                    "con-full-12-14",
                    "con-full-12-15")

var.order <- c("^anticorrupt$",
               "^anticorrupt_lead_1$",
               "^anticorrupt:ln_cn_ug_2012$",
               "^anticorrupt:ln_cn_g_2012$",
               "^anticorrupt:ln_ind_ug_2012$",
               "^anticorrupt_lead_1:ln_cn_ug_2012$",
               "^anticorrupt_lead_1:ln_cn_g_2012$",
               "^anticorrupt_lead_1:ln_ind_ug_2012$",
               
               # controls
               "^employ_zcta$", 
               "^share_pop_adult_zcta$",
               "^ln_pop_density_zcta$",
               "^share_pop_white_zcta$",
               "^median_household_income_zcta$",
               "^share_enroll_zcta$",
               "^ln_enroll_zcta$",
               "^effective_tax_zcta$", 
               "^vacancy_zcta$",
               
               "^anticorrupt:employ_zcta$",
               "^anticorrupt:share_pop_adult_zcta$",
               "^anticorrupt:ln_pop_density_zcta$",
               "^anticorrupt:share_pop_white_zcta$",
               "^anticorrupt:median_household_income_zcta$",
               "^anticorrupt:share_enroll_zcta$",
               "^anticorrupt:ln_enroll_zcta$",
               "^anticorrupt:effective_tax_zcta$",
               "^anticorrupt:vacancy_zcta$",
               
               "^anticorrupt_lead_1:employ_zcta$",
               "^anticorrupt_lead_1:share_pop_adult_zcta$",
               "^anticorrupt_lead_1:ln_pop_density_zcta$",
               "^anticorrupt_lead_1:share_pop_white_zcta$",
               "^anticorrupt_lead_1:median_household_income_zcta$",
               "^anticorrupt_lead_1:share_enroll_zcta$",
               "^anticorrupt_lead_1:ln_enroll_zcta$",
               "^anticorrupt_lead_1:effective_tax_zcta$",
               "^anticorrupt_lead_1:vacancy_zcta$")

# set var label
var.label <- str_replace_all(var.order, "\\^", "")
var.label <- str_replace_all(var.label, "\\$", "")

# save
# NOTE that a bug in stargazer causes it to use default p-values instead of 
# replacing them with the p-values we have calculated above. Results are 
# usually the same up to two decimal points. When there is a slight difference, 
# we report in the SI the p-values calculated above. 
sink(file.path(TABLE_DIR, "Table-S1-price.txt"))
#sink(file.path(TABLE_DIR, "Table-S1-price.tex"))
stargazer(did.main,
          se = did.cse,
          #p = pvalue.main, # does not seem to work under report = ("vcsp"), corrected in SI
          p.auto = FALSE,
          t.auto = FALSE,
          ci = TRUE,
          ci.custom = ci.main,
          report = ("vcsp"),
          digits = 3, 
          #type = "latex",
          type = "text",
          title = "DiD Regression Results: Home Prices",
          dep.var.labels = "Median Home Value (\\$) per Square Feet (log)",
          dep.var.labels.include = TRUE,
          #model.numbers = TRUE,
          column.labels = mod.names.main,
          order = var.order,
          covariate.labels = replaceVarName(var.vec = var.label,
                                            var.df = var.df),
          label = "tb:did-price",
          single.row = FALSE,
          df = FALSE,
          font.size = "tiny",
          add.lines = list(c("Fixed Effects: Zip Code Tabulation Area (ZCTA)",
                             rep("Yes", 6)),
                           c("Number of ZIP Codes", 
                             formatC(length(levels(con.b.12.13$clustervar$zcta)), big.mark = ","),
                             formatC(length(levels(con.os.12.13$clustervar$zcta)), big.mark = ","),
                             formatC(length(levels(con.f.12.13$clustervar$zcta)), big.mark = ","),
                             formatC(length(levels(con.f.11.12$clustervar$zcta)), big.mark = ","),
                             formatC(length(levels(con.f.12.14$clustervar$zcta)), big.mark = ","),
                             formatC(length(levels(con.f.12.15$clustervar$zcta)), big.mark = ","))
                           ),
          star.cutoffs = c(0.1, 0.05, 0.01),
          star.char = c("*", "**", "***"),
          #notes = c("Robust standard errors clustered by ZIP code in parentheses. $^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01. ZIP-code fixed effects subsume ZIP-code level measures of international students in 2012."),
          notes = c("95\\% confidence intervals and p-values are presented. Calculations are based on robust standard errors clustered by ZIP code. ZIP-code fixed effects subsume ZIP-code level measures of international students in 2012."),
          notes.align = "l",
          notes.append = FALSE)
sink()


############################################################################
## Figure 4a
############################################################################
# set alpha level
alpha <- 0.05

# set var order
var.period <- c("2011-2012", "2012-2013",
                "2012-2014", "2012-2015")

# extract coefs and clustered SEs
coef.df.period <- tibble(var = factor(var.period, levels = var.period),
                         coef = c(coef(con.f.11.12)["anticorrupt_lead_1:ln_cn_ug_2012"],
                                  coef(con.f.12.13)["anticorrupt:ln_cn_ug_2012"],
                                  coef(con.f.12.14)["anticorrupt:ln_cn_ug_2012"],
                                  coef(con.f.12.15)["anticorrupt:ln_cn_ug_2012"]),
                         cse = c(con.f.11.12$cse["anticorrupt_lead_1:ln_cn_ug_2012"],
                                 con.f.12.13$cse["anticorrupt:ln_cn_ug_2012"],
                                 con.f.12.14$cse["anticorrupt:ln_cn_ug_2012"],
                                 con.f.12.15$cse["anticorrupt:ln_cn_ug_2012"]))

# calculate C.I.
coef.df.period <- coef.df.period %>%
  mutate(multiplier = qnorm(1 - alpha/2),
         lwr = coef - multiplier * cse,
         upr = coef + multiplier * cse)

# df for annotations
txt.shift.h <- 0.004
txt.pos.h <- tibble(var = factor(var.period, levels = var.period),
                    coef = c(coef.df.period[1,]$lwr - txt.shift.h,
                             coef.df.period[2,]$upr + txt.shift.h,
                             coef.df.period[3,]$upr + txt.shift.h,
                             coef.df.period[4,]$upr + txt.shift.h),
                    lwr = coef.df.period$lwr,
                    upr = coef.df.period$upr)

# plot
axis.title.size <- 16

p.coef.period <- ggplot(data = coef.df.period,
                        aes(x = var, y = coef,
                            ymin = lwr, 
                            ymax = upr)) +
  geom_pointrange(size = 0.8) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") +
  scale_y_continuous("Estimated DiD Effect on Home Prices (log)",
                     limits = c(-0.02, 0.04)) +
  geom_text(data = txt.pos.h, 
            aes(label = var),
            size = axis.title.size - 11) +
  ggtitle("2012-Population of Chinese Int'l Undergraduates") +
  theme_bw() +  
  theme(plot.title = element_text(size = axis.title.size + 1,
                                  face = "bold",
                                  hjust = 0.5,
                                  margin = margin(0, 0, 20, 0)),
        legend.position = "none",
        axis.title.y = element_text(size = axis.title.size + 1,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid = element_blank(),
        plot.margin = margin(0.2, 0.2, 0.2, 0.5, unit = "cm")) 

p.coef.period

# # print
# pdf(file = paste(FIG_DIR, "Figure-4a.pdf", sep = "/"), width = 8, height = 5)
# print(p.coef.period)
# dev.off()


############################################################################
## Figure 4b
############################################################################
# set alpha level
alpha <- 0.05

# set var order
var.students <- c("Chinese Graduates\n2012-2013", 
                  "Indian Undergraduates\n2012-2013")

# extract coefs and clustered SEs
coef.df.students <- tibble(cty = c("China", "India"),
                           var = factor(var.students, levels = var.students),
                           coef = c(coef(con.f.12.13)["anticorrupt:ln_cn_g_2012"],
                                    coef(con.f.12.13)["anticorrupt:ln_ind_ug_2012"]),
                           cse = c(con.f.12.13$cse["anticorrupt:ln_cn_g_2012"],
                                   con.f.12.13$cse["anticorrupt:ln_ind_ug_2012"]))
                               
# calculate C.I.
coef.df.students <- coef.df.students %>%
  mutate(multiplier = qnorm(1 - alpha/2),
         lwr = coef - multiplier * cse,
         upr = coef + multiplier * cse)

# df for annotations
txt.shift.s <- 0.004
txt.pos.s <- tibble(var = factor(var.students, levels = var.students),
                    
                    coef = c(coef.df.students[1,]$lwr - txt.shift.s,
                             coef.df.students[2,]$lwr - txt.shift.s),
                    lwr = coef.df.students$lwr,
                    upr = coef.df.students$upr)

# plot
axis.title.size <- 16
shapes.stu <- c(17, 15)

p.coef.students <- ggplot(data = coef.df.students,
                        aes(x = var, y = coef,
                            ymin = lwr, 
                            ymax = upr)) +
  geom_pointrange(aes(shape = cty),
                  size = 0.8) +
  geom_hline(yintercept = 0,
             color = I(hsv(0/12, 7/12, 7/12)), 
             linetype = "dashed") +
  scale_y_continuous("Estimated DiD Effect on Home Prices (log)",
                     limits = c(-0.02, 0.04)) +
  scale_shape_manual(values = shapes.stu) + 
  geom_text(data = txt.pos.s, 
            aes(label = var),
            size = axis.title.size - 11) +
  ggtitle("2012-Population of Placebo Int'l Students") +
  theme_bw() +  
  theme(plot.title = element_text(size = axis.title.size + 1,
                                  face = "bold",
                                  hjust = 0.5,
                                  margin = margin(0, 0, 20, 0)),
        legend.position = "none",
        axis.title.y = element_text(size = axis.title.size + 1,
                                    margin = margin(0, 20, 0, 0)),
        axis.text.y = element_text(size = axis.title.size),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        panel.grid = element_blank(),
        plot.margin = margin(0.2, 0.2, 0.2, 0.5, unit = "cm")) 

p.coef.students

# # print
# pdf(file = paste(FIG_DIR, "Figure-4b.pdf", sep = "/"), width = 7, height = 5)
# print(p.coef.students)
# dev.off()


################################################################################
## Figure 4c
################################################################################
# set countries to plot
cty.vec <- c(156, # China 
             356 # India
             ) 

# subset
foia.sub <- foia.aggregate %>% 
  filter(iso3n %in% cty.vec) %>%
  filter(student_level != "Secondary") %>%
  filter(student_level != "Total") %>%
  filter(!(student_level == "Graduate" & iso3n == 356)) %>%
  mutate(cty_level = paste(cty, student_level, sep = "-"))

# set parameters
axis.title.size <- 16

display.brewer.pal(9, "Set1")
brewer.pal(9, "Set1")

colors <- c(
  "#E41A1C", # china-ug
  "#E41A1C", # china-g
  "black" # india-ug
  )

shapes <- c(17, 16, 15)

txt.size <- axis.title.size - 11

# plot total students
f.stu <- ggplot(foia.sub %>% 
                  filter(year <= 2016),
                aes(x = year, 
                    y = n/1000,
                    group = cty_level,
                    color = cty_level)) +
  geom_point(aes(shape = cty_level), size = 3) +
  geom_line() +
  scale_color_manual(values = colors) +
  scale_shape_manual(values = shapes) +
  scale_y_continuous("Total Population (thousands)",
                     limits = c(0, 250)) +
  scale_x_continuous("Year", 
                     limits = c(1998.5, 2016.5)) +
  ggtitle("Int'l Students in the U.S., 2000-2016") +
  theme_bw() +
  theme(plot.title = element_text(size = axis.title.size + 1,
                                  face = "bold",
                                  margin = margin(0, 0, 20, 0),
                                  hjust = 0.5),
        axis.title.y = element_text(size = axis.title.size + 1,
                                    margin = margin(0, 20, 0, 0)),
        # axis.title.x = element_text(size = axis.title.size,
        #                             margin = margin(20, 0, 0, 0)),
        axis.title.x = element_blank(),
        axis.text = element_text(size = axis.title.size),
        #strip.text = element_text(size = axis.title.size - 1),
        #strip.background = element_blank(),
        panel.grid = element_blank(),
        plot.margin = margin(0.2, 0.2, 0.2, 0.5, unit = "cm"),
        #legend.position = c(0.07, 0.88),
        legend.position = "none") +
  annotate("text", x = 2012, y = 220, 
           label = "Chinese\nUndergraduates", 
           color = "#E41A1C",
           size = txt.size) +
  annotate("text", x = 2015, y = 140, 
           label = "Chinese\nGraduates",
           color = "#E41A1C",
           size = txt.size) +
  annotate("text", x = 2014.5, y = 55, 
           label = "Indian\nUndergraduates",
           color = "black",
           size = txt.size)

f.stu

# # print
# pdf(file = paste(FIG_DIR, "Figure-4c.pdf", sep = "/"),
#     width = 7, height = 5)
# print(f.stu)
# dev.off()


################################################################################
## combine figures
################################################################################
# save
pdf(file = paste(FIG_DIR, "Figure-4.pdf", sep = "/"),
    width = 20.5, height = 5)
grid.arrange(p.coef.period, p.coef.students, f.stu,
             nrow = 1)
dev.off()

# g <- arrangeGrob(p.coef.period, p.coef.students, f.stu,
#                  nrow = 1)
# ggsave(file = paste(FIG_DIR, "Figure-4.eps", sep = "/"), g,
#        width = 20.5, height = 5)
